Correlation Visualization
# library("GGally")
# Create data
data <- data.frame( var1 = 1:100 + rnorm(100,sd=20),
v2 = 1:100 + rnorm(100,sd=27),
v3 = rep(1, 100) + rnorm(100, sd = 1))
data$v4 = data$var1 ** 2
data$v5 = -(data$var1 ** 2)
# Check correlations (as scatterplots), distribution and print corrleation coefficient
ggpairs(data, title="correlogram with ggpairs()")
## Split by group
data(flea)
ggpairs(flea, columns = 2:4, ggplot2::aes(colour=species))
# library("GGally")
# Create data
data <- data.frame( var1 = 1:100 + rnorm(100,sd=20),
v2 = 1:100 + rnorm(100,sd=27),
v3 = rep(1, 100) + rnorm(100, sd = 1))
data$v4 = data$var1 ** 2
data$v5 = -(data$var1 ** 2)
# Check correlation between variables
#cor(data)
# Nice visualization of correlations
ggcorr(data, method = c("everything", "pearson"))
# From the help page:
data(tips, package = "reshape")
ggpairs(
tips[, c(1, 3, 4, 2)],
upper = list(continuous = "density", combo = "box_no_facet"),
lower = list(continuous = "points", combo = "dot_no_facet")
)
Relationship can be visualized with different methods:
# Corrgram library
# library("corrgram")
# mtcars dataset is natively available in R
# head(mtcars)
# First
corrgram(mtcars,
order=TRUE,
lower.panel=panel.shade,
upper.panel=panel.pie,
text.panel=panel.txt,
main="Car Milage Data in PC2/PC1 Order")
# Second
corrgram(mtcars,
order=TRUE,
lower.panel=panel.ellipse,
upper.panel=panel.pts,
text.panel=panel.txt,
diag.panel=panel.minmax,
main="Car Milage Data in PC2/PC1 Order")
# Third
corrgram(mtcars,
order=NULL,
lower.panel=panel.shade,
upper.panel=NULL,
text.panel=panel.txt,
main="Car Milage Data (unsorted)")
# library(plotly)
adsl <- read_xpt("./01_Datasets/adsl.xpt")
pl_colorscale=list(c(0.0, '#19d3f3'),
c(0.333, '#19d3f3'),
c(0.333, '#e763fa'),
c(0.666, '#e763fa'),
c(0.666, '#636efa'),
c(1, '#636efa'))
fig <- adsl %>%
plot_ly()
fig <- fig %>%
add_trace(
type = 'splom',
dimensions = list(
list(label='Age', values=~AGE),
list(label='Height', values=~HEIGHTBL),
list(label='Weight', values=~WEIGHTBL),
list(label='BMI', values=~BMIBL)
),
text=~TRT01P,
marker = list(
color = as.integer(adsl$TRT01PN),
colorscale = pl_colorscale,
size = 7,
line = list(
width = 1,
color = 'rgb(230,230,230)'
)
)
)
fig <- fig %>%
layout(
title= 'ADSL Data set',
plot_bgcolor='rgba(240,240,240, 0.95)'
)
fig2 <- fig %>% style(diagonal = list(visible = F))
fig2
# htmlwidgets::saveWidget(as_widget(fig2), "./02_Plots/Visualization/WW Dec2023b.html")
# library(GGally)
adsl <- read_xpt("./01_Datasets/adsl.xpt")
adsl_data <- adsl[, c("AGE", "BMIBL", "HEIGHTBL", "WEIGHTBL","SEX")]
# Convert SEX to a factor if it’s not already
adsl_data$SEX <- as.factor(adsl_data$SEX)
# Create a pair plot with grouping by SEX
ggpairs(
adsl_data,
aes(color = SEX, fill = SEX), # Color by SEX
lower = list(continuous = wrap("smooth", alpha = 0.3, size = 0.5)),
diag = list(continuous = wrap("densityDiag")),
upper = list(continuous = wrap("cor", size = 4, color = "black"))
) +
theme_minimal() +
theme(
strip.text = element_text(size = 10),
axis.text = element_text(size = 8)
) +
scale_fill_manual(values = c("pink", "lightblue")) + # Customize colors if desired
scale_color_manual(values = c("pink", "lightblue"))